Pipelining (as an aside to this section)
Pipeline(steps=[...])
- where steps can be a list of processes through which to put data or a dictionary which includes the parameters for each step as values- For example, here we do a transformation (SelectKBest) and a classification (SVC) all at once in a pipeline we set up.
See a full example here
Note: If you wish to perform multiple transformations in your pipeline try FeatureUnion
In [ ]:
from sklearn.cross_validation import train_test_split
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
# a feature selection instance
selection = SelectKBest(chi2, k = 2)
# classification instance
clf = SVC(kernel = 'linear')
# make a pipeline
pipeline = Pipeline([("feature selection", selection), ("classification", clf)])
# train the model
pipeline.fit(X, y)
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
def plot_fit(X_train, y_train, X_test, y_pred):
plt.plot(X_test, y_pred, label = "Model")
#plt.plot(X_test, fun, label = "Function")
plt.scatter(X_train, y_train, label = "Samples")
plt.xlabel("x")
plt.ylabel("y")
plt.xlim((0, 1))
plt.ylim((-2, 2))
In [ ]:
import numpy as np
y_pred = pipeline.predict(X_test)
#plot_fit(X_train, y_train, X_test, y_pred)
In [ ]:
from sklearn.grid_search import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly = PolynomialFeatures(include_bias = False)
lm = LinearRegression()
pipeline = Pipeline([("polynomial_features", poly),
("linear_regression", lm)])
param_grid = dict(polynomial_features__degree = list(range(1, 30, 2)),
linear_regression__normalize = [False, True])
grid_search = GridSearchCV(pipeline, param_grid=param_grid)
grid_search.fit(X[:, np.newaxis], y)
print(grid_search.best_params_)
Created by a Microsoft Employee.
The MIT License (MIT)
Copyright (c) 2016 Micheleen Harris